##Where's Waldo: Searching for the Hidden Variable of "Real" Corruption
##By: Chiara Superti and Jennifer Pan
##R CODE FOR SECTION 3 AND 4 OF REPLICATION PAPER
############################################################################
############################################################################


############################################################################
##SETUP: Load needed packages
############################################################################

library(foreign)
library(Zelig)
library(MASS)
library(xtable)
library(car)
library(pls)
library(graphics)
library(sandwich)
library(systemfit)

##SETUP: Load data
extension.data <-read.dta("crime_WES_ICVS.dta")
extension.data82 <- extension.data[extension.data$indepdate<1982,]  #get countries indpt after 1982
attach(extension.data82)
sort(isocode)

extension.data82.No.CHE <- subset(extension.data82,isocode!="CHE")  #take out Switzerland
extension.data82.No.CHE$isocode
detach(extension.data82)
attach(extension.data82.No.CHE)
#names(extension.data82.No.CHE)


##SETUP: Renaming data to make it easier to manage
#Data from UNODC
totcrimes <-grandtotalofrecordedcrimes
totdrug <-totalrecordeddrugoffenses 
totbribe <-totalrecordedbriberycrimes
totembez <-totalrecordedembezzlements
totfraud <-totalrecordedfrauds
tothom <-totalrecordedintentionalhomicide
totkid <-totalrecordedkidnappings
totprosec_int_Hom <-totalprosecutedforintentionalhom
totincar <- totalpersonsincarcerated
totpros <- totalmalesprosecuted+totalfemalesprosecuted
totcorr <- totbribe+totembez
totcorsc <- (totcorr-mean(totcorr,na.rm=TRUE))/sd(totcorr,na.rm=TRUE)

#Weighted data from UNODC
weighted_corr <- ((totbribe+totembez)*(totcrimes-mean(totcrimes,na.rm=TRUE)/sd(totcrimes,na.rm=TRUE)))/10000000
weighted_corrsc <- (weighted_corr-mean(weighted_corr,na.rm=TRUE))/sd(weighted_corr,na.rm=TRUE)
totsolve <- totincar/totcrimes
logtotsolve <- log(totsolve)

#Data from WEF (Global Competitiveness Report)
#Averaged responses for 2001-02, 2002-03, 2004-2005
publ_trust <- (public_trust0405+public_trust0203+public_trust0102)/3
imp_exp_corr <- (importexport0405+importexport0203+importexport0102)/3
publi_cont_corr <- (public_contract0203+public_contract0102+public_contract0405)/3
bus_cost_corr <- (business_costs0203+business_costs0102+business_costs0405)/3


############################################################################
##Section 3a: Controling for bias
############################################################################
######Control for variables affecting corruption

####Recorded Measure (UNODC)
#Totcrimes & Totincar: proxy for the quality of the judicial system

reg.UNODC <- lm(totcorr~totincar+totcrimes+totincar*totcrimes+democ8201)
summary(reg.UNODC)
res.UNODC <- residuals(reg.UNODC)  #residuals from UNODC


####Perception Measure (WEF Import-Export)
#Instability: increase instab, decrease trust, increase perception of corruption
#GDP: Inc gdp, inc trust, decrease perception of corruption
#Imports: lower imports less open to the world = no comparison available (bias could go either way)

reg.WEFie <- lm(imp_exp_corr~lavgten_exec8201+lavgten_exec8201sq+lwdigdppc8201+lwdigdppc8201sq+imports8201)
summary(reg.WEFie) 
res.WEFie <- reg.WEFie$residuals[names(res.UNODC)]  #residuals from WEFie to match UNODC


####Perception Measure (WEF Public Contracts)
#All of WEFie, plus
#Democracy: because we're dealing w/ gov't contracts, type of systems likely affects perception
#Public Trust: perception of corruption very much related to how much businessmen trust politicians

reg.WEFpc <- lm(publi_cont_corr~lavgten_exec8201+lavgten_exec8201sq+lwdigdppc8201+lwdigdppc8201sq+democ8201+imports8201+publ_trust)
summary(reg.WEFpc) 
res.WEFpc <- reg.WEFpc$residuals[names(res.UNODC)]  #residuals from WEFpc to match UNODC


####Perception Measure (WEF Business Costs)
#Same as WEFpc

reg.WEFbc <- lm(bus_cost_corr~lavgten_exec8201+lavgten_exec8201sq+lwdigdppc8201+lwdigdppc8201sq+imports8201+democ8201+publ_trust)
summary(reg.WEFbc) 
res.WEFbc <- reg.WEFbc $residuals[names(res.UNODC)]  #residuals from WEFbc to match UNODC


####Perception Measure (ICRG)
#Expert know the political, social, and economic history of a country
#which would influence their perception of corruption

reg.ICRG <-lm(icrg0205~lavgten_exec8201+lavgten_exec8201sq+lwdigdppc8201+lwdigdppc8201sq+
  elf_eth+democ8201+pluralty8201+pres8201)
summary(reg.ICRG)
res.ICRG <- reg.ICRG$residuals[names(res.UNODC)]


####Perception Measure (KKM)
#Include all since this is huge composite index
reg.KKM <-lm(kkm0205~lavgten_exec8201+lavgten_exec8201sq+lwdigdppc8201+lwdigdppc8201sq+
  elf_eth+democ8201+imports8201+pluralty8201+pres8201)
summary(reg.KKM)
res.KKM <- reg.KKM$residuals[names(res.UNODC)]


####Perception Measure (CPI)
#Include all since this is huge composite index
reg.CPI <- lm(cpi0205~lavgten_exec8201+lavgten_exec8201sq+lwdigdppc8201+lwdigdppc8201sq+
  elf_eth+democ8201+imports8201+pluralty8201+pres8201)
summary(reg.CPI)
res.CPI <- reg.CPI$residuals[names(res.UNODC)]

#xtable(cbind(res.WEFie, res.WEFpc, res.WEFbc, res.ICRG, res.KKM, res.CPI, res.UNODC))


############################################################################
##Section 3b: Finding the common demonominator
############################################################################
####KKM vs. UNODC
totcorsc <- (totcorr-mean(totcorr,na.rm=TRUE))/sd(totcorr,na.rm=TRUE)
kkm0205sc <- (kkm0205-mean(kkm0205,na.rm=TRUE))/sd(kkm0205,na.rm=TRUE)
lmOrig1 <- lm(totcorsc~kkm0205sc)
lmOrig1  #0.1394
summary(lmOrig1)

st.res.UNODC <- (res.UNODC - mean(res.UNODC, na.rm=TRUE))/sd(res.UNODC, na.rm=TRUE)
st.res.KKM <- (res.KKM - mean(res.KKM,na.rm=TRUE))/sd(res.KKM,na.rm=TRUE)
lmNew1 <- lm(st.res.UNODC~st.res.KKM)
lmNew1  #0.247
summary(lmNew1)


####CPI vs. UNODC
cpi0205sc <- (cpi0205-mean(cpi0205,na.rm=TRUE))/sd(cpi0205,na.rm=TRUE)
lmOrig2 <- lm(totcorsc~cpi0205sc)
lmOrig2  #0.221

st.res.CPI <- (res.CPI - mean(res.CPI,na.rm=TRUE))/sd(res.CPI,na.rm=TRUE)
lmNew2 <- lm(st.res.UNODC~st.res.CPI)
lmNew2  #0.284
summary(lmNew2)


####ICRG vs. UNODC
icrgsc <- (icrg0205-mean(icrg0205,na.rm=TRUE))/sd(icrg0205,na.rm=TRUE)
lmOrig3 <- lm(totcorsc~icrgsc)
lmOrig3  #0.1631

st.res.ICRG <- (res.ICRG-mean(res.ICRG,na.rm=TRUE))/sd(res.ICRG,na.rm=TRUE)
lmNew3 <- lm(st.res.UNODC~st.res.ICRG)
lmNew3  #0.2169


####WEFie vs. UNODC
WEFiesc <- (imp_exp_corr-mean(imp_exp_corr,na.rm=TRUE))/sd(imp_exp_corr,na.rm=TRUE)
lmOrig4 <- lm(totcorsc~WEFiesc)
lmOrig4  #0.362 (opposite sign because index scale in opposite direction)

st.res.WEFie <- (res.WEFie-mean(res.WEFie,na.rm=TRUE))/sd(res.WEFie,na.rm=TRUE)
lmNew4 <- lm(st.res.UNODC~st.res.WEFie)
lmNew4  #0.441  (opposite sign because index scale in opposite direction)


####WEFpc vs. UNODC
WEFpcsc <- (publi_cont_corr-mean(publi_cont_corr,na.rm=TRUE))/sd(publi_cont_corr,na.rm=TRUE)
lmOrig5 <- lm(totcorsc~WEFpcsc)
lmOrig5  #0.296 (opposite sign because index scale in opposite direction)

st.res.WEFpc <- (res.WEFpc-mean(res.WEFpc,na.rm=TRUE))/sd(res.WEFpc,na.rm=TRUE)
lmNew5 <- lm(st.res.UNODC~st.res.WEFpc)
lmNew5  #0.432 (opposite sign because index scale in opposite direction)

##Increased (Graph for PDF File)
pdf("OriginalvsResidual.pdf", width=6, height=5)
par(mfrow=c(1,2))
plot(totcorsc~WEFpcsc, col="dark red", main="Relationship of Original Measures",xlab="WEFpc",ylab="UNODC",ylim=c(-2,2), xlim=c(-2,2), cex.lab=0.8, cex.main=0.8)
abline(a=coef(lmOrig5)[1], b=coef(lmOrig5)[2], col="red", lwd=2)  #correlation of original
abline(a=0, b=-1, col="forestgreen", lty="dashed", lwd=1.5)  #perfect correlation
legend(x="bottomleft", cex=0.5, legend=c("Correlation of Original Measures","Perfect correlation"), col=c("red", "forestgreen"), lty=c("solid", "dashed"), lwd=c(1.5, 2))
plot(st.res.UNODC~st.res.WEFpc, main="Relationship of Residuals",xlab="WEFpc residuals",ylab="UNODC residuals",col="blue",ylim=c(-2,2), xlim=c(-2,2), cex.lab=0.8, cex.main=0.8)
abline(a=coef(lmNew5)[1],b=coef(lmNew5)[2], col="red", lwd=2) #correlation of residuals
abline(a=0, b=-1, col="forestgreen", lty="dashed", lwd=1.5)  #perfect correlation
legend(x="bottomleft", cex=0.5, legend=c("Correlation of Residuals","Perfect correlation"), col=c("red", "forestgreen"), lty=c("solid", "dashed"), lwd=c(1, 2))
dev.off()


####WEFbc vs. UNODC
bus_cost_corr <- (business_costs0203+business_costs0102+business_costs0405)/3
WEFbcsc <- (bus_cost_corr-mean(bus_cost_corr,na.rm=TRUE))/sd(bus_cost_corr,na.rm=TRUE)
lmOrig6 <- lm(totcorsc~WEFbcsc)
lmOrig6 #0.234  (opposite sign because index scale in opposite direction)

st.res.WEFbc <- (res.WEFbc-mean(res.WEFbc,na.rm=TRUE))/sd(res.WEFbc,na.rm=TRUE)
lmNew6 <- lm(st.res.UNODC~st.res.WEFbc)
lmNew6 #0.279  (opposite sign because index scale in opposite direction)


############################################################################
##Section 4: Creating the Non-Perception Instrumental Variable
############################################################################
########Two-stage model
attach(extension.data82.No.CHE)

reg.tot <- lm(totcorr~totincar+totcrimes+totincar*totcrimes+democ8201)
summary(reg.tot)
residuals1<-reg.tot$residuals
st.res1 <- (residuals1 - mean(residuals1,na.rm=TRUE))/sd(residuals1,na.rm=TRUE)

extension.data82.No.CHE_inst<-cbind(extension.data82.No.CHE[names(st.res1),],st.res1)

attach(extension.data82.No.CHE_inst)
dataz<-as.data.frame(na.omit(cbind(lwdigdppc8201,cpi0205,lavgten_exec8201,lavgten_exec8201sq,democ8201,1,st.res1)))

attach(dataz)
form1 <- list("mu1"=lwdigdppc8201~cpi0205+democ8201,
  "mu2"=cpi0205~st.res1,
  "inst"=~st.res1)
z.out2 <- zelig(formula=form1, model="twosls",data=dataz)
summary(z.out2)

#Model with Instrument
stage1 <- lm(cpi0205~st.res1,data=dataz)
summary(stage1)
stage2 <- lm(lwdigdppc8201~stage1$fitted+democ8201)
b <- summary(stage2)
xtable(b)

#summary(lm(democ8201~st.res1,dataz))

#Model without Instrument
lm_noinst <- lm(lwdigdppc8201~cpi0205+democ8201)
a <- summary(lm_noinst)
xtable(a)


########Bivariate normal
attach(extension.data82.No.CHE)
dep.matrix <- as.matrix(cbind(stage1$fitted,stage1$residuals))
a <- names(stage1$fitted)

data.m <- as.data.frame(cbind(democ8201,lwdigdppc8201,lwdigdppc8201sq,lavgten_exec8201,lavgten_exec8201sq,elf_eth,imports8201,fuel8201,ores8201,pluralty8201,pres8201))

data.m.1 <- data.m[a,]
attach(data.m.1)

biv.reg <- lm(dep.matrix~democ8201+lwdigdppc8201+lwdigdppc8201sq+lavgten_exec8201+lavgten_exec8201sq+elf_eth+imports8201+fuel8201+ores8201+pluralty8201+pres8201)

summary(biv.reg)
